# export VLLM_PP_LAYER_PARTITION="16,15,15,15"
# vllm serve /share/fshare/common/models/deepseek-ai/DeepSeek-R1-awq \
# --pipeline-parallel-size 4 --tensor-parallel-size 4 \
# --trust-remote-code --max-model-len 8192 \
# --gpu-memory-utilization 0.95 \
# --max_num_seqs 1 \
# --quantization awq_marlin --host 0.0.0.0 --port 12345

#export VLLM_MLA_PERFORM_MATRIX_ABSORPTION=0 ## 默认1
#export VLLM_CUDA_MEM_ALIGN_KV_CACHE=0       ## 默认1  


export NCCL_SOCKET_IFNAME=ens15f0
export GLOO_SOCKET_IFNAME=ens15f0
# export NCCL_SOCKET_IFNAME=ibs2
# export GLOO_SOCKET_IFNAME=ibs2
#export NCCL_IB_DISABLE=0

export VLLM_FORCE_NCCL_COMM=1
#export NCCL_IB_GID_INDEX=0
#export NCCL_IB_HCA=mlx5_0
export NCCL_DEBUG=INFO

vllm serve /home/kd/models/DeepSeek-R1-awq \
	--pipeline-parallel-size 4 --tensor_parallel_size 8 \
	--trust-remote-code --max-model-len 16384 \
	--gpu-memory-utilization 0.94 \
	--quantization awq_marlin --host 0.0.0.0 --port 12345 --dtype float16

